In [1]:
import pandas as pd
# Load the CSV file
file_path = r'C:\Users\Dell\Desktop\Trading Analysis\adani.csv' # Replace with your actual file path
data = pd.read_csv(file_path)
# Display the first few rows of the dataset
print(data.head())
timestamp symbol company open \
0 1025461800000000000 ACC ACC Limited 107.789
1 1025461800000000000 AMBUJACEM Ambuja Cements Limited 17.278
2 1025461800000000000 ADANIENT Adani Enterprises Limited -0.010
3 1025548200000000000 ACC ACC Limited 108.496
4 1025548200000000000 AMBUJACEM Ambuja Cements Limited 17.573
high low close volume dividends stock_splits
0 109.810 107.115 108.968 659631 0.0 0.0
1 17.660 17.202 17.573 630442 0.0 0.0
2 -0.011 -0.010 -0.010 1080397 0.0 0.0
3 110.046 107.789 108.059 282660 0.0 0.0
4 17.748 17.409 17.560 1007265 0.0 0.0
In [2]:
data
Out[2]:
| timestamp | symbol | company | open | high | low | close | volume | dividends | stock_splits | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1025461800000000000 | ACC | ACC Limited | 107.789 | 109.810 | 107.115 | 108.968 | 659631 | 0.0 | 0.0 |
| 1 | 1025461800000000000 | AMBUJACEM | Ambuja Cements Limited | 17.278 | 17.660 | 17.202 | 17.573 | 630442 | 0.0 | 0.0 |
| 2 | 1025461800000000000 | ADANIENT | Adani Enterprises Limited | -0.010 | -0.011 | -0.010 | -0.010 | 1080397 | 0.0 | 0.0 |
| 3 | 1025548200000000000 | ACC | ACC Limited | 108.496 | 110.046 | 107.789 | 108.059 | 282660 | 0.0 | 0.0 |
| 4 | 1025548200000000000 | AMBUJACEM | Ambuja Cements Limited | 17.573 | 17.748 | 17.409 | 17.560 | 1007265 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 31492 | 1677090600000000000 | AMBUJACEM | Ambuja Cements Limited | 336.000 | 343.850 | 331.350 | 336.900 | 10662112 | 0.0 | 0.0 |
| 31493 | 1677090600000000000 | ADANIPORTS | Adani Ports and Special Economic Zone Limited | 539.500 | 558.150 | 533.650 | 551.850 | 10709730 | 0.0 | 0.0 |
| 31494 | 1677090600000000000 | ADANIENT | Adani Enterprises Limited | 1380.000 | 1438.000 | 1350.000 | 1382.650 | 8904676 | 0.0 | 0.0 |
| 31495 | 1677090600000000000 | ATGL | Adani Total Gas Limited | 791.350 | 791.350 | 791.350 | 791.350 | 51867 | 0.0 | 0.0 |
| 31496 | 1677090600000000000 | NDTV | New Delhi Television Limited | 195.250 | 201.700 | 193.300 | 199.100 | 228676 | 0.0 | 0.0 |
31497 rows × 10 columns
In [4]:
# Summary statistics
summary_stats = data.describe()
print(summary_stats)
timestamp open high low close \
count 3.149700e+04 31497.000000 31497.000000 31497.000000 31497.000000
mean 1.409762e+18 404.542261 411.745505 396.689603 404.171407
std 1.817938e+17 623.108375 633.783423 610.711441 622.359535
min 1.025462e+18 -0.011000 -0.011000 -0.011000 -0.011000
25% 1.267036e+18 59.423000 60.773000 58.000000 59.238000
50% 1.434911e+18 140.100000 143.201000 137.100000 139.993000
75% 1.569868e+18 384.277000 392.376000 377.043000 384.034000
max 1.677091e+18 4175.000000 4236.750000 4066.400000 4165.300000
volume dividends stock_splits
count 3.149700e+04 31497.000000 31497.000000
mean 3.594056e+06 0.017824 0.000381
std 7.932287e+06 0.530637 0.041405
min 0.000000e+00 0.000000 0.000000
25% 3.934800e+05 0.000000 0.000000
50% 1.347016e+06 0.000000 0.000000
75% 3.874840e+06 0.000000 0.000000
max 2.421999e+08 58.000000 5.000000
In [12]:
import matplotlib.pyplot as plt
import seaborn as sns
# Set the aesthetic style of the plots
sns.set(style="whitegrid")
# Plot distribution of key numerical variables
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(14, 18))
# Plot distributions
sns.histplot(data['open'], kde=True, ax=axes[0, 0])
axes[0, 0].set_title('Distribution of Opening Prices')
sns.histplot(data['high'], kde=True, ax=axes[0, 1])
axes[0, 1].set_title('Distribution of High Prices')
sns.histplot(data['low'], kde=True, ax=axes[1, 0])
axes[1, 0].set_title('Distribution of Low Prices')
sns.histplot(data['close'], kde=True, ax=axes[1, 1])
axes[1, 1].set_title('Distribution of Closing Prices')
sns.histplot(data['volume'], kde=True, ax=axes[2, 0])
axes[2, 0].set_title('Distribution of Volume')
sns.histplot(data['dividends'], kde=True, ax=axes[2, 1])
axes[2, 1].set_title('Distribution of Dividends')
plt.tight_layout()
plt.show()
In [4]:
import matplotlib.pyplot as plt
import seaborn as sns
# Select only numeric columns
numeric_data = data.select_dtypes(include=['float64', 'int64'])
# Calculate correlation matrix
correlation_matrix = numeric_data.corr()
# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix Heatmap')
plt.show()
The heatmap in the provided file visualizes the correlations between various stock attributes like open, high, low, close, and volume. It uses a color gradient to represent the strength and direction of these correlations, where darker colors indicate stronger relationships and lighter colors indicate weaker ones. This visualization helps identify patterns, such as a strong positive correlation between high and close prices, indicating that higher highs often coincide with higher closing prices. Additionally, the heatmap can reveal how trading volume correlates with price movements, aiding in understanding the stock's behavior and informing trading strategies.¶
In [8]:
# Pair plot to visualize relationships between variables
sns.pairplot(data[['open', 'high', 'low', 'close', 'volume']])
plt.show()
In [9]:
# Check for missing values
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)
Missing values in each column: timestamp 0 symbol 0 company 0 open 0 high 0 low 0 close 0 volume 0 dividends 0 stock_splits 0 dtype: int64
TIME SERIES ANALYSIS
In [10]:
# Convert timestamp to datetime
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ns')
# Plot time series for opening prices
plt.figure(figsize=(14, 7))
sns.lineplot(x='timestamp', y='open', data=data)
plt.title('Opening Prices Over Time')
plt.xlabel('Time')
plt.ylabel('Opening Price')
plt.show()
BOX PLOTS TO DIRECT OUTLIERS
In [14]:
# Box plots for price variables
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 10))
sns.boxplot(data['open'], ax=axes[0, 0])
axes[0, 0].set_title('Box Plot of Opening Prices')
sns.boxplot(data['high'], ax=axes[0, 1])
axes[0, 1].set_title('Box Plot of High Prices')
sns.boxplot(data['low'], ax=axes[1, 0])
axes[1, 0].set_title('Box Plot of Low Prices')
sns.boxplot(data['close'], ax=axes[1, 1])
axes[1, 1].set_title('Box Plot of Closing Prices')
plt.tight_layout()
plt.show()
Volatility Analysis¶
In [5]:
# Calculate daily price change (close - open)
data['price_change'] = data['close'] - data['open']
# Plot price change
plt.figure(figsize=(6, 3))
sns.histplot(data['price_change'], kde=True)
plt.title('Distribution of Daily Price Change')
plt.xlabel('Daily Price Change')
plt.ylabel('Frequency')
plt.show()
# Plot time series of daily price change
plt.figure(figsize=(10, 3))
sns.lineplot(x='timestamp', y='price_change', data=data)
plt.title('Daily Price Change Over Time')
plt.xlabel('Time')
plt.ylabel('Daily Price Change')
plt.show()
Candlestick Chart: Provides a detailed view of stock price movements, showing the opening, closing, high, and low prices for each time period.¶
In [6]:
import plotly.graph_objects as go
# Create the candlestick chart
fig = go.Figure(data=[go.Candlestick(x=data['timestamp'],
open=data['open'],
high=data['high'],
low=data['low'],
close=data['close'])])
fig.update_layout(title='Candlestick Chart', xaxis_title='Time', yaxis_title='Price')
fig.show()
Moving Averages: Smooth out short-term price fluctuations and help identify the direction of the trend¶
In [7]:
# Calculate moving averages
data['MA20'] = data['close'].rolling(window=20).mean()
data['MA50'] = data['close'].rolling(window=50).mean()
# Plot moving averages
plt.figure(figsize=(14, 7))
plt.plot(data['timestamp'], data['close'], label='Closing Price')
plt.plot(data['timestamp'], data['MA20'], label='20-Day MA')
plt.plot(data['timestamp'], data['MA50'], label='50-Day MA')
plt.title('Moving Averages')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()
Daily Returns: Show the percentage change in stock price from one day to the next, useful for understanding volatility.¶
In [8]:
# Calculate daily returns
data['daily_return'] = data['close'].pct_change()
# Plot daily returns
plt.figure(figsize=(14, 7))
sns.lineplot(x='timestamp', y='daily_return', data=data)
plt.title('Daily Returns')
plt.xlabel('Time')
plt.ylabel('Daily Return')
plt.show()
# Plot distribution of daily returns
plt.figure(figsize=(14, 7))
sns.histplot(data['daily_return'].dropna(), kde=True)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.show()
ollinger Bands: Help identify overbought or oversold conditions based on volatility.¶
In [9]:
# Calculate Bollinger Bands
data['MA20'] = data['close'].rolling(window=20).mean()
data['stddev'] = data['close'].rolling(window=20).std()
data['upper_band'] = data['MA20'] + (data['stddev'] * 2)
data['lower_band'] = data['MA20'] - (data['stddev'] * 2)
# Plot Bollinger Bands
plt.figure(figsize=(14, 7))
plt.plot(data['timestamp'], data['close'], label='Closing Price')
plt.plot(data['timestamp'], data['MA20'], label='20-Day MA')
plt.plot(data['timestamp'], data['upper_band'], label='Upper Band')
plt.plot(data['timestamp'], data['lower_band'], label='Lower Band')
plt.fill_between(data['timestamp'], data['upper_band'], data['lower_band'], alpha=0.2)
plt.title('Bollinger Bands')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()
Volume by Price: Shows the total trading volume at different price levels, highlighting areas of significant trading activity.¶
In [10]:
import numpy as np
# Define price bins
bins = np.linspace(data['low'].min(), data['high'].max(), 50)
data['price_bin'] = pd.cut(data['close'], bins)
# Calculate volume by price bin
volume_by_price = data.groupby('price_bin')['volume'].sum()
# Plot volume by price
plt.figure(figsize=(14, 7))
volume_by_price.plot(kind='barh')
plt.title('Volume by Price')
plt.xlabel('Volume')
plt.ylabel('Price Bin')
plt.show()
C:\Users\Dell\AppData\Local\Temp\ipykernel_1292\47422830.py:8: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
In [ ]: